This is the analysis pipeline to analyze data generated from 3D models of fate tracked Montastraea cavernosa infected with stony coral tissue loss disease *** ### All analyses performed with R version 3.5.0
For the following analyses we will require the use of a number of different R packages. Most of which can be sourced from CRAN, but some must be downloaded from GitHub. We can use the following code to ld in the packages and install any packages not previously installed in the R console.
#setwd("link to github")
if (!require("pacman")) install.packages("pacman")
pacman::p_load(ggplot2, stringr, gridExtra, ggpubr, Rmisc, FSA, rcompanion, RColorBrewer, dplyr, vegan, nparcomp, RVAideMemoire, MANOVA.RM, pairwiseAdonis, PMCMR, PMCMRplus, patchwork)
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
corr <- read.csv("../data/total.correlation.csv", head=T)
str(corr)
head(corr)
# Changing values to cm^2
corr$total.colony.size <- corr$total.colony.size*10000
corr$healthy.area <- corr$healthy.area*10000
corr$disease.area <- corr$disease.area*10000
# I am reordering the dates in my dataset to ensure that they display chronologically.
corr$site=factor(corr$site, levels=unique(corr$site))
levels(corr$date)
corr$date = factor(corr$date, levels(corr$date)[c(3, 4, 1, 2)])
levels(corr$date)
# Subsetting by timepoint, and then resetting the factor level for each timepoint
corr.t1 <- subset(corr, date == '8.24.18')
droplevels(corr.t1$date)
corr.t2 <- subset(corr, date == '9.11.18')
droplevels(corr.t2$date)
corr.t3 <- subset(corr, date == '11.8.18')
droplevels(corr.t3$date)
corr.t4 <- subset(corr, date == '12.17.18')
droplevels(corr.t4$date)
# This will be used when we analyze rates of loss and disease progression
corr.loss <- subset(corr, date != '8.24.18')
droplevels(corr.loss$date)
# Subsetting by Site (you can reset the factor for each site if needed from the code above)
corr.bc1 <- subset(corr, site == 'BC1')
corr.t328 <- subset(corr, site == 'T328')
corr.ftl4 <- subset(corr, site == 'FTL4')
# Average colony size from each site
mean(corr.bc1$total.colony.size)
mean(corr.t328$total.colony.size)
mean(corr.ftl4$total.colony.size)
# Subsetting the first time point from each location
corr.t1.bc1 <- subset(corr, site == "BC1")
corr.t1.t328 <- subset(corr, site == 'T328')
corr.t1.ftl4 <- subset(corr, site == "FTL4")
# Initial average colony size for each site
mean(corr.t1.bc1$total.colony.size)
mean(corr.t1.t328$total.colony.size)
mean(corr.t1.ftl4$total.colony.size)
shapiro.total <- shapiro.test(corr$total.colony.size)
shapiro.total
##
## Shapiro-Wilk normality test
##
## data: corr$total.colony.size
## W = 0.68843, p-value = 5.694e-13
shapiro.healthy <- shapiro.test(corr$healthy.area)
shapiro.healthy
##
## Shapiro-Wilk normality test
##
## data: corr$healthy.area
## W = 0.6942, p-value = 7.679e-13
shapiro.disease <- shapiro.test(corr$disease.area)
shapiro.disease
##
## Shapiro-Wilk normality test
##
## data: corr$disease.area
## W = 0.6443, p-value = 6.517e-14
fried.total <- friedmanTest(y=corr$total.colony.size, groups = corr$date, blocks = corr$colony.id)
fried.total
##
## Friedman rank sum test
##
## data: corr$total.colony.size , corr$date and corr$colony.id
## Friedman chi-squared = 48.555, df = 3, p-value = 1.623e-10
fried.disease<- friedmanTest(y=corr$disease.area, groups = corr$date, blocks = corr$colony.id)
fried.disease
##
## Friedman rank sum test
##
## data: corr$disease.area , corr$date and corr$colony.id
## Friedman chi-squared = 14.017, df = 3, p-value = 0.002882
fried.healthy <- friedmanTest(y=corr$healthy.area, groups = corr$date, blocks = corr$colony.id)
fried.healthy
##
## Friedman rank sum test
##
## data: corr$healthy.area , corr$date and corr$colony.id
## Friedman chi-squared = 41.294, df = 3, p-value = 5.664e-09
fried.lesion <- friedmanTest(y = corr$lesion.count, groups = corr$date, blocks = corr$colony.id)
fried.lesion
##
## Friedman rank sum test
##
## data: corr$lesion.count , corr$date and corr$colony.id
## Friedman chi-squared = 7.4536, df = 3, p-value = 0.05876
# Total colony size
post.total <- posthoc.friedman.nemenyi.test(total.colony.size ~ date | colony.id, data = corr)
summary(post.total)
##
## Pairwise comparisons using Nemenyi multiple comparison test
## with q approximation for unreplicated blocked data
## data: total.colony.size and date and colony.id
## P value adjustment method: none
## H0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#taking pvalues from Nemenyi test and manually correcting them
total.val <- c(0.0135,4.8e-07, 1.1e-09, 0.0875, 0.0044, 0.7458 )
# Using a bonferroni correction
adjust.total.val <- p.adjust(total.val, method = 'bonferroni')
# Pairwise comparisons using Nemenyi multiple comparison test
# with q approximation for unreplicated blocked data
#
# data: total.colony.size and date and colony.id
#
#
# P value adjustment method: none
# H0 statistic p.value adjusted p
# 1 8.24.18 = 9.11.18 4.269075 0.0135 0.018
# 2 8.24.18 = 11.8.18 7.589466 4.8e-07 2.88e-06
# 3 8.24.18 = 12.17.18 9.012491 1.1e-09 6.60e-09
# 4 9.11.18 = 11.8.18 3.320392 0.0875 5.25e-01
# 5 9.11.18 = 12.17.18 4.743416 0.0044 2.64e-02
# 6 11.8.18 = 12.17.18 1.423025 0.7458 1.00e+00
############################################
# Healthy tissue area
post.healthy <- posthoc.friedman.nemenyi.test(healthy.area ~ date | colony.id, data = corr)
summary(post.healthy)
##
## Pairwise comparisons using Nemenyi multiple comparison test
## with q approximation for unreplicated blocked data
## data: healthy.area and date and colony.id
## P value adjustment method: none
## H0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#taking pvalues from Nemenyi test and manually correcting them
healthy.val <- c(0.0044, 2.9e-06, 1.9e-08, 0.3358,0.0497, 0.8078 )
# Using a bonferroni correction
adjust.healthy.val <- p.adjust(healthy.val, method = 'bonferroni')
# Pairwise comparisons using Nemenyi multiple comparison test
# with q approximation for unreplicated blocked data
#
# data: healthy.area and date and colony.id
#
#
# P value adjustment method: none
# H0 statistic p.value padjust
# 1 8.24.18 = 9.11.18 4.743416 0.0044 2.640e-02
# 2 8.24.18 = 11.8.18 7.115125 2.9e-06 1.740e-05
# 3 8.24.18 = 12.17.18 8.380036 1.9e-08 1.140e-07
# 4 9.11.18 = 11.8.18 2.371708 0.3358 1.000e+00
# 5 9.11.18 = 12.17.18 3.636619 0.0497 2.982e-01
# 6 11.8.18 = 12.17.18 1.264911 0.8078 2.982e-01
############################################
#Lesion area
post.disease <- posthoc.friedman.nemenyi.test(disease.area ~ date | colony.id, data = corr)
summary(post.disease)
##
## Pairwise comparisons using Nemenyi multiple comparison test
## with q approximation for unreplicated blocked data
## data: disease.area and date and colony.id
## P value adjustment method: none
## H0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#taking pvalues from Nemenyi test and manually correcting them
disease.val <- c(0.536, 0.970, 0.114, 0.808,0.002,0.037 )
# Using a bonferroni correction
adjust.disease.val <- p.adjust(disease.val, method = 'bonferroni')
# Pairwise comparisons using Nemenyi multiple comparison test
# with q approximation for unreplicated blocked data
#
# data: disease.area and date and colony.id
#
#
# P value adjustment method: none
# H0 statistic p.value padjust
# 1 8.24.18 = 9.11.18 1.8973666 0.536 1.000
# 2 8.24.18 = 11.8.18 0.6324555 0.970 1.000
# 3 8.24.18 = 12.17.18 3.1622777 0.114 0.684
# 4 9.11.18 = 11.8.18 1.2649111 0.808 1.000
# 5 9.11.18 = 12.17.18 5.0596443 0.002 0.012
# 6 11.8.18 = 12.17.18 3.7947332 0.037 0.222
adonis() function in vegan. We will run a total of 999 permutations and test the effects of site, time, and the interaction between the two factors.set.seed(999)
perm <- adonis(formula = corr[c(5:7)] ~ site * date, data = corr, method = "euclidian", perm = 999)
perm
##
## Call:
## adonis(formula = corr[c(5:7)] ~ site * date, data = corr, permutations = 999, method = "euclidian")
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 318526671 159263335 7.9460 0.15790 0.001 ***
## date 3 13288496 4429499 0.2210 0.00659 0.893
## site:date 6 1839834 306639 0.0153 0.00091 1.000
## Residuals 84 1683620228 20043098 0.83460
## Total 95 2017275229 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PERMANOVA reveals that Site has a significant effect on tissue areas.
pairwise.adonis() from the package pairwiseAdonis to see where differences are between our sites.set.seed(999)
perm.pair <- pairwise.adonis(corr[5:7],factors=corr$site,
sim.method='euclidian',p.adjust.m='bonferroni', perm = 999)
perm.pair
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 BC1 vs T328 1 313927982 13.154125 0.18486806 0.001 0.003 *
## 2 BC1 vs FTL4 1 48874191 9.027413 0.12032153 0.008 0.024 .
## 3 T328 vs FTL4 1 131553695 4.925368 0.07359494 0.028 0.084
BC1 is significantly different from both T328 and FTL4.
# Looking at correlation between total colony size and disease lesion area
colonysize.vs.lesionarea <- cor.test(corr$total.colony.size,corr$disease.area, method="spearman", use="complete.obs")
colonysize.vs.lesionarea
##
## Spearman's rank correlation rho
##
## data: corr$total.colony.size and corr$disease.area
## S = 125790, p-value = 0.1535
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1468166
# Looking at correlation between total colony size and the number of disease lesions on a colony
colonysize.vs.lesioncount<- cor.test(corr$total.colony.size,corr$lesion.count, method="spearman", use="complete.obs")
colonysize.vs.lesioncount
##
## Spearman's rank correlation rho
##
## data: corr$total.colony.size and corr$lesion.count
## S = 120610, p-value = 0.07599
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1819743
# Looking at correlation between the amount of lesion tissue area and number of disease lesions
lesionarea.vs.lesioncount <- cor.test(corr$disease.area,corr$lesion.count, method="spearman", use="complete.obs")
lesionarea.vs.lesioncount
##
## Spearman's rank correlation rho
##
## data: corr$disease.area and corr$lesion.count
## S = 48640, p-value = 8.23e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6701035
The number of disease lesions were positively correlated with the amount of disease lesion tissue area. However, colony size was not correlated with disease lesion area or the number of disease lesions. So, bigger colonies have no more (or less) disease than smaller colonies.
kruskal.test(lesion.count ~ site, data = corr)
##
## Kruskal-Wallis rank sum test
##
## data: lesion.count by site
## Kruskal-Wallis chi-squared = 17.076, df = 2, p-value = 0.0001958
dunnTest(lesion.count ~ site, data = corr, method = 'bh')
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Benjamini-Hochberg method.
## Comparison Z P.unadj P.adj
## 1 BC1 - FTL4 0.7428398 4.575787e-01 0.4575786786
## 2 BC1 - T328 -3.1580994 1.588014e-03 0.0023820209
## 3 FTL4 - T328 -3.9595527 7.509028e-05 0.0002252708
# This forces y axis labels to have 1 decimal place.
scale <- function(x) sprintf("%.0f", x)
# Plotting a boxplot of mean total colony size for each site at each timepoint.
total.colony.box.1 <-
ggplot(data = corr, aes(x = date, y = total.colony.size))+
geom_boxplot(aes(fill = site), alpha = 1, outlier.shape = NA, color = "black") +
geom_point(aes(fill = site), color = "black", size = 3, position = position_jitterdodge()) +
scale_fill_manual(values = c("#7BA46C", "#EACF9E", "#008D91")) +
scale_x_discrete(labels = c(expression('T'[1]), expression('T'[2]), expression('T'[3]), expression('T'[4])))+
labs(x = "Time", y = bquote("Total Colony Area" ~ (cm^2)),fill = 'Site') +
scale_y_continuous(labels = scale)+
theme(legend.title = element_blank(),
legend.text = element_text(color = "black", size = 25),
legend.background = element_blank())+
facet_grid(.~site,scales="free")
total.colony.box.1
total.colony.box <- total.colony.box.1 + theme(
panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),
panel.background = element_rect(fill = '#F5F5F5'),
plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(color="black"),
text = element_text(size=15, color="black"),
axis.text.x=element_text(size=12, color="black"),
axis.text.y=element_text(size=12, color="black"),
axis.title.y = element_text(size = 18, color = 'black'),
plot.margin = unit(c(0.5, 1, 0.5, 1), "cm"))+
rremove("legend")
total.colony.box
We repeat this using healthy tissue area, disease lesion area and lesion count.
# No correlation
lesion.count.colony.size.1 <-ggplot(corr, aes(x = total.colony.size, y = lesion.count, color = site))+
geom_point(size = 5)+
scale_color_manual(values = c("#7BA46C", "#EACF9E", "#008D91"))+
labs(x = bquote('Total Colony Size'~(cm^2)), y = bquote('Cattle Tag Error'~(cm^2)))+
scale_y_continuous(labels = scale)
lesion.count.colony.size <- lesion.count.colony.size.1 +
theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"), panel.background = element_rect(fill = '#F5F5F5'),
plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(color="black"),
text = element_text(size=15, color="black"),
axis.text.x=element_text(size=12, color="black"),
axis.text.y=element_text(size=12, color="black"),
axis.title.y = element_text(size = 18, color = 'black'),
plot.margin = unit(c(0.5, 1, 0.5, 1.75), "cm"))+
rremove("legend")
lesion.count.colony.size
# Significant Correlation
lesion.count.disease.area.1 <-ggplot(corr, aes(x = disease.area, y = lesion.count, color = site))+
geom_point(size = 5)+
geom_smooth(method = "lm", fill = 'grey30', color = 'grey30')+
scale_color_manual(values = c("#7BA46C", "#EACF9E", "#008D91"))+
labs(x = bquote('Disease Lesion Area'~(cm^2)), y = "Lesion Count") +
scale_y_continuous(labels = scale)
lesion.count.disease.area.1
lesion.count.disease.area <- lesion.count.disease.area.1 +
theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"), panel.background = element_rect(fill = '#F5F5F5'),
plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(color="black"),
text = element_text(size=15, color="black"),
axis.text.x=element_text(size=12, color="black"),
axis.text.y=element_text(size=12, color="black"),
axis.title.y = element_text(size = 18, color = 'black'),
plot.margin = unit(c(0.5, 1, 0.5, 1.57), "cm"))+
rremove("legend")
lesion.count.disease.area
lesion.plots.patch <- total.colony.box + healthy.colony.box + disease.area.box + lesion.count.box + colony.size.disease.area + lesion.count.disease.area + lesion.count.colony.size + guide_area() + plot_layout(nrow = 4, guides = 'collect')+plot_annotation(tag_levels = 'a')
## Save
ggsave("../figures/lesion.plots.png", plot= lesion.plots.patch, width=20, height=20, units="in", dpi=600)
***
## Rate of loss 1 and total colony size at T2
cor.test(corr.t2$loss,corr.t2$total.colony.size, method="spearman", use="complete.obs")
##
## Spearman's rank correlation rho
##
## data: corr.t2$loss and corr.t2$total.colony.size
## S = 2862, p-value = 0.2487
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2443478
# Rate of loss 2 and total colony size at T3
cor.test(corr.t3$loss,corr.t3$total.colony.size, method="spearman", use="complete.obs")
##
## Spearman's rank correlation rho
##
## data: corr.t3$loss and corr.t3$total.colony.size
## S = 2296, p-value = 0.9936
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.001739509
# Rate of loss 3 and total colony size at T4
cor.test(corr.t4$loss,corr.t4$total.colony.size, method="spearman", use="complete.obs")
##
## Spearman's rank correlation rho
##
## data: corr.t4$loss and corr.t4$total.colony.size
## S = 2161.8, p-value = 0.7804
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.0600653
#subset
corr.loss <- subset(corr, date != "8.24.18")